# Replication code for Ban and Goldstein
# May 9, 2012


# corruptiondata_aer.dta is the dataset from Ferran and Finaz (2011)
# made available on the American Economic Review website here:
# http://www.aeaweb.org/articles.php?doi=10.1257/aer.101.4.1274

# "F&F" below refers to the paper by Ferran and Finaz (2011)

#################################################
#### Generate Table 1 (Replicate F&F Table 4)####
#################################################

require(foreign)
require(car)
table4 <- read.dta("C:/Users/Becca/Desktop/Spring 2012 Courses/Gov 2001/Replication paper/Ferraz and Finan/corruptiondata_aer.dta")
#table4 <- read.dta("/Users/Pam/Documents/Gov2001/Gov2001paper/Ferraz_Finan_data/corruptiondata_aer.dta")
esample2 <- table4[table4$esample2==1,]
attach(esample2)

# Column 1
col1 <- lm(pcorrupt~first)
summary.lm(col1)
var.cov <- hccm(col1, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 2
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
col2 <- lm(pcorrupt~first+pref_masc+pref_idade_tse+party_d1+party_d3+party_d4+party_d5+party_d6+party_d7+party_d8+party_d9+party_d10+party_d11+party_d12+party_d13+party_d14+party_d15+party_d16+party_d17+party_d18)
summary(col2)
var.cov <- hccm(col2, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 3
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
col3 <- lm(pcorrupt~first+prefchar+munichar+lrec_trans)
summary(col3)
# lrec_trans is the log of current transfers received (rec_transf_correntes in the dataset)
var.cov <- hccm(col3, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 4
col4 <- lm(pcorrupt~first+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca)
summary(col4)
var.cov <- hccm(col4, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 5
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col5 <- lm(pcorrupt~first+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca + sorteio)
summary(col5)
# Will not assess coeffs for sorteio10 because of singularity with sorteio1. Stata omits sorteio1 for this reason.
var.cov <- hccm(col5, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 6 -- includes fixed effects
col6 <- lm(pcorrupt~first+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary(col6)
var.cov <- hccm(col6, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 7
uf_d <- cbind(uf_d1, uf_d2, uf_d3, uf_d4, uf_d5, uf_d6, uf_d7, uf_d8, uf_d9, uf_d10, uf_d11, uf_d12, uf_d13, uf_d14, uf_d15, uf_d16, uf_d17, uf_d18, uf_d19, uf_d20, uf_d21, uf_d22, uf_d23, uf_d24, uf_d25, uf_d26)     
require(rgenoud)
require(Matching)
Y <- pcorrupt
X <- cbind(pref_masc,pref_idade_tse,party_d1, party_d3, party_d4, party_d5, party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18,lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea,lrec_trans,lrec_fisc,p_cad_pref,vereador_eleit,ENLP2000,comarca,sorteio, factor(uf))
X <- as.matrix(X)
Tr <- first
col7 <- Match(Y, Tr, X, Z = X, M=3, BiasAdjust=TRUE, Var.calc = 3)
summary(col7)

# Column 8
table4 <- read.dta("C:/Users/Becca/Desktop/Spring 2012 Courses/Gov 2001/Replication paper/Ferraz and Finan/corruptiondata_aer.dta")
#table4 <-read.dta("/Users/Pam/Documents/Gov2001/Gov2001paper/Ferraz_Finan_data/corruptiondata_aer.dta")
esample2 <- table4[table4$esample2==1,]
#require(Zelig)
#col8 <- zelig(esample2$pcorrupt ~ first+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+uf, below = 0, above = Inf, model = "tobit", data=esample2)
#summary(col8)

#require(VGAM)
#col8 <- vglm(Y~first+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+uf, tobit(Lower=0, Upper=Inf))

attach(esample2)
require(AER)
col8 <- tobit(Y ~ first + prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
require(sandwich)
bread <-vcov(col8)
est.fun <- estfun(col8)
meat <- t(est.fun)%*%est.fun
sandwich <- bread%*%meat%*%bread
library(lmtest)
coeftest(col8, sandwich)

##################################################
#### Generate Table 2 (Replicate F&F Table 6) ####
##################################################

library(foreign)
corruptiondata <- read.dta("C:/Users/Becca/Desktop/Spring 2012 Courses/Gov 2001/Replication paper/Ferraz and Finan/corruptiondata_aer.dta")
#corruptiondata<-read.dta("/Users/Pam/Documents/Gov2001/Gov2001paper/Ferraz_Finan_data/corruptiondata_aer.dta")
attach(corruptiondata)

wm<-rep(0,476)
for(i in 1:476){
	if(reeleito[i]==1){wm[i]<-winmargin2000[i]}
	if(incumbent[i]==1){wm[i]<-winmargin2000_inclost[i]}
}

running<-wm
for(i in 1:476){
	if(incumbent[i]==1){running[i]<- -wm[i]}
}
running2<-running^2
running3<-running^3
running4<-running^4
spline1<-first*running
spline2<-first*running2
spline3<-first*running3
spline4<-first*running4

table6<-cbind(corruptiondata,running,running2,running3,running4,spline1,spline2,spline4)
table6<-table6[table6$esample==1,]

table6.1<-table6[table6$running!="NA",]
attach(table6.1)
prefchar2<-cbind(pref_masc,pref_idade_tse,pref_escola,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar2<-cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio<-cbind(sorteio1,sorteio2,sorteio3,sorteio4,sorteio5,sorteio6,sorteio7,sorteio8,sorteio9,sorteio10)

col1<-lm(pcorrupt~first+prefchar2+munichar2+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
# Intercept: -.028
var.cov <- hccm(col1, "hc1")
ses <- sqrt(diag(var.cov))
ses
# SE: .011**
# R2: .14

attach(table6)
prefchar2<-cbind(pref_masc,pref_idade_tse,pref_escola,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar2<-cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio<-cbind(sorteio1,sorteio2,sorteio3,sorteio4,sorteio5,sorteio6,sorteio7,sorteio8,sorteio9,sorteio10)

col2<-lm(pcorrupt~first+running+prefchar2+munichar2+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col2)
var.cov <- hccm(col2, "hc1")
ses <- sqrt(diag(var.cov))
ses

col3<-lm(pcorrupt~first+running+running2+prefchar2+munichar2+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col3)
var.cov <- hccm(col3, "hc1")
ses <- sqrt(diag(var.cov))
ses

col4<-lm(pcorrupt~first+running+running2+running3+prefchar2+munichar2+lrec_trans +p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col4)
var.cov <- hccm(col4, "hc1")
ses <- sqrt(diag(var.cov))
ses

col5<-lm(pcorrupt~first+running+spline1+prefchar2+munichar2+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col5)
var.cov <- hccm(col5, "hc1")
ses <- sqrt(diag(var.cov))
ses

col6<-lm(pcorrupt~first+running+spline1+running2+spline2+prefchar2+munichar2+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col6)
var.cov <- hccm(col6, "hc1")
ses <- sqrt(diag(var.cov))
ses

col7<-lm(pcorrupt~first+running+spline1+running2+running3+spline2+spline3+prefchar2+munichar2+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col7)
var.cov <- hccm(col7, "hc1")
ses <- sqrt(diag(var.cov))
ses

### Generate Table 3 ####
### First, get the imbalance of their matching estimator in their Table 4

require(cem)
# Column 4.7
uf_d <- cbind(uf_d1, uf_d2, uf_d3, uf_d4, uf_d5, uf_d6, uf_d7, uf_d8, uf_d9, uf_d10, uf_d11, uf_d12, uf_d13, uf_d14, uf_d15, uf_d16, uf_d17, uf_d18, uf_d19, uf_d20, uf_d21, uf_d22, uf_d23, uf_d24, uf_d25, uf_d26)     
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
require(rgenoud)
require(Matching)
Y <- pcorrupt
X <- cbind(pref_masc,pref_idade_tse,party_d1, party_d3, party_d4, party_d5, party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18,lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea,lrec_trans,lrec_fisc,p_cad_pref,vereador_eleit,ENLP2000,comarca, sorteio, factor(uf))
X <- as.matrix(X)
Tr <- first
col7.improved <- Match(Y, Tr, X, Z = X, M=1, BiasAdjust=TRUE, Var.calc = 1)
summary(col7.improved)
col7.data <- as.data.frame(col7.improved$mdata)
col7.D <- col7.data$Tr
imbalance(group=col7.data$Tr,data=col7.data,drop=c("Tr","Y", "orig.weighted.treated.nobs"))

### Now, our own CEM match
## CEM
require(foreign)
data <- read.dta("C:/Users/Becca/Desktop/Spring 2012 Courses/Gov 2001/Replication paper/Ferraz and Finan/corruptiondata_aer.dta")
esample2 <- data[data$esample2==1,]
attach(esample2)
library(MatchIt)
library(cem)
library(Zelig)
D <- first
Y <- pcorrupt
nsorteio.fac <- as.factor(nsorteio)
balance <- as.data.frame(cbind(Y, D, pop, purb, p_secundario, pib_capita_02, mun_novo, rec_transf_correntes, nsorteio, nsorteio.fac))

imbalance(group=D, data=balance, drop=c("Y","D"))

original.match <- cem(treatment="D", data=balance, drop="Y")
original.match
est.orig <- att(original.match, Y~D, data=balance)
est.orig

## For reference: nnmatching on the six "crucial" variables
D <- first
Y <- pcorrupt
nsorteio.fac <- as.factor(nsorteio)
balance <- as.data.frame(cbind(Y, D, pop, purb, p_secundario, pib_capita_02, mun_novo, rec_transf_correntes, nsorteio, nsorteio.fac))
attach(balance)
lrec <- log(rec_transf_correntes)
require(VGAM)
prop.scores <- glm(D~pop+purb+mun_novo+p_secundario+pib_capita_02+rec_transf_correntes+nsorteio.fac, family = binomial(link = "logit"), data=balance)
nnmatch <- matchit(formula=D~prop.scores$fitted,data=balance,method="nearest",distance=prop.scores$fitted, discard="control")
summary(nnmatch)
nnmatch.data <- match.data(nnmatch)
imbalance.nnmatch <- imbalance(group=nnmatch.data$D, data=nnmatch.data, drop=c("Y","D","distance","weights"))
imbalance.nnmatch
reg.nnmatch <- lm(Y~D+pop+purb+mun_novo+p_secundario+pib_capita_02+rec_transf_correntes+nsorteio)
reg.nnmatch
summary.lm(reg.nnmatch)

## For reference: maha matching on the six "crucial" variables
attach(esample2)
balance <- as.data.frame(cbind(Y, D, pop, purb, p_secundario, pib_capita_02, mun_novo, rec_transf_correntes, nsorteio, nsorteio.fac))
maha.match <- matchit(formula=D~pop+purb+mun_novo+p_secundario+pib_capita_02+nsorteio.fac+lrec, data=balance, method="nearest", distance="mahalanobis", discard="control")
summary(maha.match)
maha.data<-match.data(maha.match)
imbalance.maha <- imbalance(group=maha.data$D, data=maha.data, drop=c("Y","D","distance","weights"))
imbalance.maha
attach(maha.data)
reg.maha <-lm(Y~D+pop+purb+mun_novo+p_secundario+pib_capita_02+rec_transf_correntes+nsorteio.fac,data=maha.data)
summary(reg.maha)

##########################
#### Generate Table 5 ####
##########################


## Analyze the data for second-term mayors who ran for higher office

require(car)
library(car)
higher <- read.csv("C:/Users/Becca/Dropbox/Gov2001paper/corruptiondata_with_higher_office.csv")
second.term <- higher[higher$esample2==1&higher$first==0,]
attach(second.term)
prefchar2 <- c(pref_masc, pref_idade_tse, pref_escola, party_d1,  party_d3, party_d18)
munichar2 <- c(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)

# Column 1
col1 <- lm(pcorrupt~ran_higher_office)
summary.lm(col1)
var.cov <- hccm(col1, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 2
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
col2 <- lm(pcorrupt~ran_higher_office+pref_masc+pref_idade_tse+party_d1+party_d3+party_d4+party_d5+party_d6+party_d7+party_d8+party_d9+party_d10+party_d11+party_d12+party_d13+party_d14+party_d15+party_d16+party_d17+party_d18)
summary(col2)
var.cov <- hccm(col2, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 3
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
col3 <- lm(pcorrupt~ran_higher_office+prefchar+munichar+lrec_trans)
summary(col3)
# lrec_trans is the log of current transfers received (rec_transf_correntes in the dataset)
var.cov <- hccm(col3, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 4
col4 <- lm(pcorrupt~ran_higher_office+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+comarca)
summary(col4)
var.cov <- hccm(col4, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 5
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col5 <- lm(pcorrupt~ran_higher_office+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+comarca + sorteio)
summary(col5)
# Will not assess coeffs for sorteio10 because of singularity with sorteio1. Stata omits sorteio1 for this reason.
var.cov <- hccm(col5, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 6 -- includes fixed effects
col6 <- lm(pcorrupt~ran_higher_office+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+comarca+sorteio+factor(uf))
summary(col6)
var.cov <- hccm(col6, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 7 -- CEM match
library(MatchIt)
library(cem)
library(Zelig)
D <- ran_higher_office
Y <- pcorrupt
nsorteio.fac <- as.factor(nsorteio)
balance <- as.data.frame(cbind(Y, D, pop, purb, p_secundario, pib_capita_02, mun_novo, rec_transf_correntes, nsorteio.fac))
nsorteio.grp <- list(c("2", "3"), c("4", "5"), c("6", "7"), c("8", "9"), c("10", "11"))
orig.match.higher <- cem(treatment="D", data=balance, drop="Y")
orig.match.higher
est <- att(orig.match.higher, Y~D, data=balance)
est

# Column 8
attach(second.term)
require(AER)
col8 <- tobit(Y ~ ran_higher_office + prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+comarca+sorteio+factor(uf))
require(sandwich)
bread <-vcov(col8)
est.fun <- estfun(col8)
meat <- t(est.fun)%*%est.fun
sandwich <- bread%*%meat%*%bread
library(lmtest)
coeftest(col8, sandwich)

#### Generate Table 6 ####
## Original imbalance
attach(second.term)
uf_d <- cbind(uf_d1, uf_d2, uf_d3, uf_d4, uf_d5, uf_d6, uf_d7, uf_d8, uf_d9, uf_d10, uf_d11, uf_d12, uf_d13, uf_d14, uf_d15, uf_d16, uf_d17, uf_d18, uf_d19, uf_d20, uf_d21, uf_d22, uf_d23, uf_d24, uf_d25, uf_d26)     
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
require(rgenoud)
require(Matching)
Y <- pcorrupt
X <- cbind(pref_masc,pref_idade_tse,party_d1, party_d3, party_d4, party_d5, party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18,lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea,lrec_trans,lrec_fisc,p_cad_pref,vereador_eleit,ENLP2000,comarca, sorteio, factor(uf))
X <- as.matrix(X)
Tr <- ran_higher_office
orig.imb <- Match(Y, Tr, X, Z = X, M=1, BiasAdjust=TRUE, Var.calc = 1)
summary(orig.imb)
orig.imb.data <- as.data.frame(orig.imb$mdata)
orig.imb.D <- orig.imb.data$Tr
imbalance(group=orig.imb.data$Tr,data=orig.imb.data,drop=c("Tr","Y", "orig.weighted.treated.nobs"))


## For reference: nearest-neighbor and maha match
# Nearest neighbor
D <- ran_higher_office
Y <- pcorrupt
nsorteio.fac <- as.factor(nsorteio)
balance <- as.data.frame(cbind(Y, D, pop, purb, p_secundario, pib_capita_02, mun_novo, rec_transf_correntes, nsorteio, nsorteio.fac))
attach(balance)
lrec <- log(rec_transf_correntes)
require(VGAM)
prop.scores <- glm(D~pop+purb+mun_novo+p_secundario+pib_capita_02+rec_transf_correntes+nsorteio.fac, family = binomial(link = "logit"), data=second.term)
nnmatch <- matchit(formula=D~prop.scores$fitted,data=balance,method="nearest",distance=prop.scores$fitted, discard="control")
nnmatch.data <- match.data(nnmatch)
nnmatch.data
imbalance.nnmatch <- imbalance(group=nnmatch.data$D, data=nnmatch.data, drop=c("Y","D","distance","weights"))
imbalance.nnmatch
reg.nnmatch <- lm(Y~D+pop+purb+mun_novo+p_secundario+pib_capita_02+rec_transf_correntes+nsorteio.fac)
reg.nnmatch
summary.lm(reg.nnmatch)

# Maha match
attach(second.term)
balance <- as.data.frame(cbind(Y, D, pop, purb, p_secundario, pib_capita_02, mun_novo, rec_transf_correntes, nsorteio, nsorteio.fac))
maha.match <- matchit(formula=D~pop+purb+mun_novo+p_secundario+pib_capita_02+nsorteio.fac+lrec, data=balance, method="nearest", distance="mahalanobis", discard="control")
summary(maha.match)
maha.data<-match.data(maha.match)
imbalance.maha <- imbalance(group=maha.data$D, data=maha.data, drop=c("Y","D","distance","weights"))
imbalance.maha
attach(maha.data)
reg.maha <-lm(Y~D+pop+purb+mun_novo+p_secundario+pib_capita_02+rec_transf_correntes+nsorteio.fac,data=maha.data)
summary(reg.maha)
summary.lm(reg.maha)


##################################
### Appendix: Replication Code ###
##################################

#########################
### Replicate Table 1 ###
#########################
attach(esample2)

# Row 1: Proportion of municipalities with at least one irregularity
row1 <- cbind(dcorrupt_desvio, dcorrupt_licitacao, dcorrupt_superfat, dcorrupt, dmismanagement)
apply(row1,2,FUN=mean,na.rm=T)
# dcorrupt_desvio dcorrupt_licitacao  dcorrupt_superfat           dcorrupt 	
#        0.53571429         0.57563025         0.07142857         0.78571429
#    dmismanagement 
#        0.98633880 
apply(row1,2,FUN=sd,na.rm=T)
# dcorrupt_desvio dcorrupt_licitacao  dcorrupt_superfat           dcorrupt 
#         0.4992476          0.4947670          0.2578103          0.4107576 
#  dmismanagement 
#         0.1162389 

# Column 1 starting at row 2: Conditional on at least one irregularity, diversion of funds
col1.2data <- esample2[dcorrupt_desvio==1,]
attach(col1.2data)
col1.2 <- cbind(ncorrupt_desvio, corrupt_desvio, shcorrupt_desvio, pcorrupt_desvio)
apply(col1.2,2,FUN=mean,na.rm=T)
# ncorrupt_desvio   corrupt_desvio shcorrupt_desvio  pcorrupt_desvio 
#     1.686275e+00     1.592052e+05     4.663310e-02     4.050956e-02 
apply(col1.2,2,FUN=sd,na.rm=T)
#  ncorrupt_desvio   corrupt_desvio shcorrupt_desvio  pcorrupt_desvio 
#    1.005696e+00     3.243038e+05     3.602128e-02     7.195398e-02 

# Column 2 starting at row 2: conditional on at least one irregularity, illegal procurement
attach(esample2)
col2.2data <- esample2[dcorrupt_licitacao==1,]
attach(col2.2data)
col2.2 <- cbind(ncorrupt_licitacao, corrupt_licitacao, shcorrupt_licitacao, pcorrupt_licitacao)
apply(col2.2,2,FUN=mean,na.rm=T)
# ncorrupt_licitacao   corrupt_licitacao shcorrupt_licitacao  pcorrupt_licitacao 
#       1.656934e+00        2.914315e+05        4.499680e-02        6.988787e-02 
apply(col2.2,2,FUN=sd,na.rm=T)
# ncorrupt_licitacao   corrupt_licitacao shcorrupt_licitacao  pcorrupt_licitacao 
#       9.449147e-01        5.782721e+05        2.784699e-02        9.346220e-02 

# Column 3 starting at row 2: conditional on at least one irregularity, overinvoicing
attach(esample2)
col3.2data <- esample2[dcorrupt_superfat==1,]
attach(col3.2data)
col3.2 <- cbind(ncorrupt_superfat, corrupt_superfat, shcorrupt_superfat, pcorrupt_superfat)
apply(col3.2,2,FUN=mean,na.rm=T)
# ncorrupt_superfat   corrupt_superfat shcorrupt_superfat  pcorrupt_superfat 
#      1.029412e+00       6.067014e+04       2.884408e-02       1.506680e-02 
apply(col3.2,2,FUN=sd,na.rm=T)
# ncorrupt_superfat   corrupt_superfat shcorrupt_superfat  pcorrupt_superfat 
#      1.714986e-01       1.667338e+05       1.242569e-02       3.550966e-02


# Column 4 starting at row 2: conditional on at least one irregularity, corruption indicator
attach(esample2)
col4.2data <- esample2[dcorrupt==1,]
attach(col4.2data)
col4.2 <- cbind(ncorrupt, valor_corrupt, ncorrupt_os, pcorrupt)
apply(col4.2,2,FUN=mean,na.rm=T)
#  ncorrupt valor_corrupt   ncorrupt_os      pcorrupt 
# 2.457219e+00  3.275731e+05  6.738306e-02  8.019113e-02 
apply(col4.2,2,FUN=sd,na.rm=T)
#     ncorrupt valor_corrupt   ncorrupt_os      pcorrupt 
# 1.554045e+00  6.275142e+05  5.014698e-02  1.086024e-01 

# sum ncorrupt_superfat corrupt_superfat shcorrupt_superfat pcorrupt_superfat if dcorrupt_superfat==1&esample2==1;
# sum ncorrupt valor_corrupt ncorrupt_os pcorrupt if dcorrupt==1&esample2==1;
# sum pmismanagement if esample2==1&dmismanagement>0;
# sum dmismanagement if esample2==1;

# Column 5 starting at row 2: conditional on at least one irregularity, mismanagement
attach(esample2)
col5.2data <- esample2[dmismanagement>0,]
attach(col5.2data)
col5.2 <- cbind(pmismanagement, dmismanagement)
apply(col5.2,2,FUN=mean,na.rm=T)
# pmismanagement dmismanagement 
#      1.646702       1.000000 
apply(col5.2,2,FUN=sd,na.rm=T)
# pmismanagement dmismanagement 
#      1.154131       0.000000  

#########################
### Replicate table 2 ###
#########################
attach(esample2)
table2 <- cbind(ncorrupt_desvio, shcorrupt_desvio, pcorrupt_desvio, ncorrupt_licitacao, shcorrupt_licitacao, pcorrupt_licitacao, ncorrupt_superfat, shcorrupt_superfat, pcorrupt_superfat, ncorrupt, ncorrupt_os, pcorrupt)
table2a <- (lm(ncorrupt_desvio~first+reeleito-1))$coef
table2a
table2b <- (lm(shcorrupt_desvio~first+reeleito-1))$coef
table2b
table2c <- (lm(pcorrupt_desvio~first+reeleito-1))$coef
table2c
table2d <- (lm(ncorrupt_licitacao~first+reeleito-1))$coef
table2d
table2e <- (lm(shcorrupt_licitacao~first+reeleito-1))$coef
table2e
table2f <- (lm(pcorrupt_licitacao~first+reeleito-1))$coef
table2f
table2g <- (lm(ncorrupt_superfat~first+reeleito-1))$coef
table2g
table2h <- (lm(shcorrupt_superfat~first+reeleito-1))$coef
table2h
table2i <- (lm(pcorrupt_superfat~first+reeleito-1))$coef
table2i
table2j <- (lm(ncorrupt~first+reeleito-1))$coef
table2j
table2k <- (lm(ncorrupt_os~first+reeleito-1))$coef
table2k
table2l <- (lm(pcorrupt~first+reeleito-1))$coef
table2l

table2m <- summary.lm(lm(ncorrupt_desvio~reeleito))
table2m
table2n <- summary.lm(lm(shcorrupt_desvio~reeleito))
table2n
table2o <- summary.lm(lm(pcorrupt_desvio~reeleito))
table2o
table2p <- summary.lm(lm(ncorrupt_licitacao~reeleito))
table2p
table2q <- summary.lm(lm(shcorrupt_licitacao~reeleito))
table2q
table2r <- summary.lm(lm(pcorrupt_licitacao~reeleito))
table2r
table2s <- summary.lm(lm(ncorrupt_superfat~reeleito))
table2s
table2t <- summary.lm(lm(shcorrupt_superfat~reeleito))
table2t
table2u <- summary.lm(lm(pcorrupt_superfat~reeleito))
table2u
table2v <- summary.lm(lm(ncorrupt~reeleito))
table2v
table2w <- summary.lm(lm(ncorrupt_os~reeleito))
table2w
table2x <- summary.lm(lm(pcorrupt~reeleito))
table2x

#########################
### Replicate table 3 ###
#########################
require(car)
attach(esample2)
first.term <- esample2[first==1,]
second.term <- esample2[reeleito==1,]

mean.first.men <- mean(first.term$pref_masc)
mean.first.men
mean.second.men <- mean(second.term$pref_masc)
mean.second.men
se.diff.men <- lm(pref_masc~first)
summary.lm(se.diff.men)
var.cov <- hccm(se.diff.men, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.school <- mean(first.term$pref_escola)
mean.first.school
mean.second.school <- mean(second.term$pref_escola)
mean.second.school
se.diff.school <- lm(pref_escola~first,data=esample2)
summary.lm(se.diff.school)
var.cov <- hccm(se.diff.school, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.age <- mean(first.term$pref_idade_tse)
mean.first.age
mean.second.age <- mean(second.term$pref_idade_tse)
mean.second.age
se.diff.age <- lm(pref_idade_tse~first,data=esample2)
summary.lm(se.diff.age)
var.cov <- hccm(se.diff.age, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.pop <- mean(first.term$pop)
mean.first.pop 
mean.second.pop <- mean(second.term$pop)
mean.second.pop 
se.diff.pop <- lm(pop~first,data=esample2)
summary.lm(se.diff.pop)
var.cov <- hccm(se.diff.pop, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.purb <- mean(first.term$purb)
mean.first.purb 
mean.second.purb <- mean(second.term$purb)
mean.second.purb 
se.diff.purb <- lm(purb~first,data=esample2)
summary.lm(se.diff.purb)
var.cov <- hccm(se.diff.purb, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.p_secundario<- mean(first.term$p_secundario)
mean.first.p_secundario
mean.second.p_secundario<- mean(second.term$p_secundario)
mean.second.p_secundario
se.diff.p_secundario<- lm(p_secundario~first,data=esample2)
summary.lm(se.diff.p_secundario)
var.cov <- hccm(se.diff.p_secundario, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.mun_novo<- mean(first.term$mun_novo)
mean.first.mun_novo
mean.second.mun_novo<- mean(second.term$mun_novo)
mean.second.mun_novo
se.diff.mun_novo<- lm(mun_novo~first,data=esample2)
summary.lm(se.diff.mun_novo)
var.cov <- hccm(se.diff.mun_novo, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.pib_capita_02<- mean(first.term$pib_capita_02)
mean.first.pib_capita_02
mean.second.pib_capita_02<- mean(second.term$pib_capita_02)
mean.second.pib_capita_02
se.diff.pib_capita_02 <- lm(pib_capita_02~first,data=esample2)
summary.lm(se.diff.pib_capita_02)
var.cov <- hccm(se.diff.pib_capita_02, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.gini_ipea <- mean(first.term$gini_ipea)
mean.first.gini_ipea
mean.second.gini_ipea<- mean(second.term$gini_ipea)
mean.second.gini_ipea
se.diff.gini_ipea<- lm(gini_ipea~first,data=esample2)
summary.lm(se.diff.gini_ipea)
var.cov <- hccm(se.diff.gini_ipea, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.rec_transf <- mean(first.term$rec_transf)
mean.first.rec_transf
mean.second.rec_transf<- mean(second.term$rec_transf)
mean.second.rec_transf
se.diff.rec_transf <- lm(esample2$rec_transf~first,data=esample2)
summary.lm(se.diff.rec_transf)
var.cov <- hccm(se.diff.rec_transf, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.op_01_04 <- mean(first.term$op_01_04)
mean.first.op_01_04
mean.second.op_01_04<- mean(second.term$op_01_04)
mean.second.op_01_04
se.diff.op_01_04 <- lm(esample2$op_01_04~first,data=esample2)
summary.lm(se.diff.op_01_04)
var.cov <- hccm(se.diff.opp_01_04, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.p_cad_pref <- mean(first.term$p_cad_pref)
mean.first.p_cad_pref
mean.second.p_cad_pref<- mean(second.term$p_cad_pref)
mean.second.p_cad_pref
se.diff.p_cad_pref <- lm(p_cad_pref~first,data=esample2)
summary.lm(se.diff.p_cad_pref)
var.cov <- hccm(se.diff.p_cad_pref, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.vereador_eleit <- mean(first.term$vereador_eleit)
mean.first.vereador_eleit
mean.second.vereador_eleit<- mean(second.term$vereador_eleit)
mean.second.vereador_eleit
se.diff.vereador_eleit <- lm(vereador_eleit~first,data=esample2)
summary.lm(se.diff.vereador_eleit)
var.cov <- hccm(se.diff.vereador_eleit, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.ENLP2000 <- mean(first.term$ENLP2000)
mean.first.ENLP2000 
mean.second.ENLP2000 <- mean(second.term$ENLP2000 )
mean.second.ENLP2000 
se.diff.ENLP2000 <- lm(ENLP2000~first,data=esample2)
summary.lm(se.diff.ENLP2000)
var.cov <- hccm(se.diff.ENLP2000, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.winmargin <- mean(first.term$winmargin)
mean.first.winmargin 
mean.second.winmargin <- mean(second.term$winmargin)
mean.second.winmargin  
se.diff.winmargin <- lm(winmargin~first,data=esample2)
summary.lm(se.diff.winmargin)
var.cov <- hccm(se.diff.winmargin, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.comarca <- mean(first.term$comarca )
mean.first.comarca 
mean.second.comarca <- mean(second.term$comarca )
mean.second.comarca 
se.diff.comarca <- lm(comarca~first,data=esample2)
summary.lm(se.diff.comarca)
var.cov <- hccm(se.diff.comarca, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.media2  <- mean(first.term$media2)
mean.first.media2  
mean.second.media2  <- mean(second.term$media2)
mean.second.media2  
se.diff.media2  <- lm(media2~first,data=esample2)
summary.lm(se.diff.media2)
var.cov <- hccm(se.diff.media2, "hc1")
ses <- sqrt(diag(var.cov))
ses

mean.first.totrecursos <- mean(first.term$totrecursos)
mean.first.totrecursos 
mean.second.totrecursos <- mean(second.term$totrecursos)
mean.second.totrecursos 
se.diff.totrecursos <- lm(totrecursos~first,data=esample2)
summary.lm(se.diff.totrecursos)
var.cov <- hccm(se.diff.totrecursos, "hc1")
ses <- sqrt(diag(var.cov))
ses

first.tot_os <- na.omit(first.term$tot_os)
mean.first.tot_os<- mean(first.tot_os)
mean.first.tot_os
second.tot_os <- na.omit(second.term$tot_os)
mean.second.tot_os<- mean(second.tot_os)
mean.second.tot_os
use <- cbind(esample2$tot_os, esample2$first)
use <- na.omit(use)
use <- data.frame(use)
attach(use)
se.diff.tot_os<- lm(use$X1~use$X2,data=use)
summary.lm(se.diff.tot_os)
var.cov <- hccm(se.diff.tot_os, "hc1")
ses <- sqrt(diag(var.cov))
ses

#########################
### Replicate table 5 ###
#########################

# Column 1
col5.1 <- lm(esample2$ncorrupt~esample2$first)
summary.lm(col5.1)
var.cov <- hccm(col5.1, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 2
col5.2 <- lm(ncorrupt~first+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col5.2)
var.cov <- hccm(col5.2, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 3
Y.5.3 <- ncorrupt
col5.3 <- Match(Y.5.3, Tr, X, Z = X, M=3, BiasAdjust=TRUE, Var.calc = 3)
summary(col5.3)
# This is different from what's reported in the article -- the article reports -.339, this reports -.35.

# Column 4
col5.4 <- glm.nb(ncorrupt~first+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col5.4)
# This is very different from what's reported -- they report -.456, we find -.293
require(sandwich)
bread <-vcov(col5.4)
est.fun <- estfun(col5.4)
meat <- t(est.fun)%*%est.fun
sandwich <- bread%*%meat%*%bread
library(lmtest)
coeftest(col5.4, sandwich)


# Column 5
col5.5 <- lm(esample2$ncorrupt_os~esample2$first)
summary.lm(col5.5)
var.cov <- hccm(col5.5, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 6
col5.6 <- lm(ncorrupt_os~first+prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col5.6)
var.cov <- hccm(col5.6, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 7
Y.5.7 <- ncorrupt_os
col5.7 <- Match(Y.5.7, Tr, X, Z = X, M=3, BiasAdjust=TRUE, Var.calc = 3)
summary(col5.7)

# Column 8
col5.8 <- tobit(Y.5.7 ~ first + prefchar+munichar+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
require(sandwich)
bread <-vcov(col5.8)
est.fun <- estfun(col5.8)
meat <- t(est.fun)%*%est.fun
sandwich <- bread%*%meat%*%bread
library(lmtest)
coeftest(col5.8, sandwich)
# -0.01426786  0.00505626 , as compared to -.009, .005 in the paper.


#########################
### Replicate table 7 ###
#########################

attach(esample2)
first_experienced <- rep(0, 476)
for(i in 1:476){
if(reeleito[i]==0&exp_prefeito[i]==1) first_experienced[i]=1
}
experience1 <- rep(0,476)
for(i in 1:476){
if(first_experienced[i]==1 | reeleito[i]==1) experience1[i]=1
}
experience2 <- rep(0,476)
for(i in 1:476){
if(reeleito[i]==1 | reeleito_2004[i]==1) experience2[i]=1
}
experience3 <- rep(0,476)
for(i in 1:476){
if(reeleito[i]==1 | elected1[i]==1) experience3[i]=1
}
### There are lots of NA's in vereador9600, so I've replaced them with zeros.
vereador9600[is.na(vereador9600)] <- 0
nexp <- rep(0,476)
for(i in 1:476){
nexp[i]=sum(exp_prefeito[i],vereador9600[i])
}
nexp <- nexp*4
nexp2 <- nexp^2
table7 <- cbind(esample2, first_experienced, experience1, experience2, experience3, nexp, nexp2)

# Column 1
col7.1.data <- table7[experience2==1,]
attach(col7.1.data)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col7.1 <- lm(pcorrupt~first+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=col7.1.data)
summary.lm(col7.1)
var.cov <- hccm(col7.1, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 2
attach(table7)
col7.2.data <- table7[experience3==1,]
attach(col7.2.data)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col7.2 <- lm(pcorrupt~first+pref_masc+pref_idade_tse+party_d1+party_d3+party_d4+party_d5+party_d6+party_d7+party_d8+party_d9+party_d10+party_d11+party_d12+party_d13+party_d14+party_d15+party_d16+party_d17+party_d18+lpop+purb+ p_secundario+mun_novo+lpib02+gini_ipea+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=col7.2.data)
summary.lm(col7.2)
var.cov <- hccm(col7.2, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 3
attach(table7)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col7.3 <- lm(pcorrupt~first+exp_prefeito+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=table7)
summary.lm(col7.3)
# the estimate and std error for "first" and "exp_prefeito" match almost exactly.
var.cov <- hccm(col7.3, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 4
col7.4 <- lm(pcorrupt~first+nexp+nexp2+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=table7)
summary.lm(col7.4)
var.cov <- hccm(col7.4, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 5
col7.5.data <- table7[experience1==1,]
attach(col7.5.data)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col7.5 <- lm(pcorrupt~first+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=col7.5.data)
summary.lm(col7.5)
# We get -.040 for this, the article gets -.038
var.cov <- hccm(col7.5, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 6
attach(table7)
col7.6.data <- table7[first==0 | (first==1&(exp_prefeito==1|vereador9600==1)),]
attach(col7.6.data)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col7.6 <- lm(pcorrupt~first+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=col7.6.data)
summary.lm(col7.6)
var.cov <- hccm(col7.6, "hc1")
ses <- sqrt(diag(var.cov))
ses

#########################
### Replicate table 8 ###
#########################

# Column 1
attach(esample2)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col8.1 <- lm(pmismanagement~first+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=esample2)
summary.lm(col8.1)
# Pt est is the same, std error off (.102 in the article, .116 here)
var.cov <- hccm(col8.1, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 2
col8.2.data <- col7.1.data
attach(col8.2.data)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col8.2 <- lm(pmismanagement~first+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=col8.2.data)
summary.lm(col8.2)
var.cov <- hccm(col8.2, "hc1")
ses <- sqrt(diag(var.cov))
ses


# Column 3
col8.3.data <- col7.5.data
attach(col8.3.data)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col8.3 <- lm(pmismanagement~first+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=col8.3.data)
summary.lm(col8.3)
var.cov <- hccm(col8.3, "hc1")
ses <- sqrt(diag(var.cov))
ses

# Column 4
attach(esample2)
col8.4.data <- esample2[!is.na(esample2$pmismanagement),]
attach(col8.4.data)
prefchar <- cbind(pref_masc,pref_idade_tse,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar <- cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio <- cbind(sorteio1, sorteio2, sorteio3, sorteio4, sorteio5, sorteio6, sorteio7, sorteio8, sorteio9, sorteio10)
col8.4 <- lm(pcorrupt~first+prefchar+munichar+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf), data=col8.4.data)
summary.lm(col8.4)
var.cov <- hccm(col8.4, "hc1")
ses <- sqrt(diag(var.cov))
ses

#--------------------
#     Table 9
#--------------------
require(sandwich)
require(lmtest)

conveniousdata<-read.dta("C:/Users/Becca/Desktop/Spring 2012 Courses/Gov 2001/Replication paper/Ferraz and Finan/conveniosdata_aer.dta")
#conveniousdata<-read.dta("/Users/Pam/Documents/Gov2001/Gov2001paper/Ferraz_Finan_data/conveniosdata_aer.dta")
attach(conveniousdata)

ano2001<-rep(0,1904)
ano2002<-rep(0,1904)
ano2003<-rep(0,1904)
ano2004<-rep(0,1904)
for(i in 1:1904){
	if(ano[i]==2001){ano2001[i]<-1}
	if(ano[i]==2002){ano2002[i]<-1}
	if(ano[i]==2003){ano2003[i]<-1}	
	if(ano[i]==2004){ano2004[i]<-1}
}

tr_ano2001<-first*ano2001
tr_ano2002<-first*ano2002
tr_ano2003<-first*ano2003
tr_ano2004<-first*ano2004

ano_lpib02 <- ano*lpib02
ano_purb <- ano*purb
ano_p_secundario <- ano*p_secundario
ano_mun_novo <- ano*mun_novo
ano_gini_ipea <- ano*gini_ipea
ano_pref_masc <- ano*pref_masc
ano_pref_idade_tse <- ano*pref_idade_tse
ano_pref_escola <- ano*pref_escola
ano_p_cad_pref <- ano*p_cad_pref
ano_vereador_eleit <- ano*vereador_eleit
ano_ENLP2000 <- ano*ENLP2000
ano_comarca <- ano*comarca

table9<-cbind(conveniousdata,tr_ano2001,tr_ano2002,tr_ano2003,tr_ano2004,ano_lpib02,ano_purb,ano_p_secundario,ano_mun_novo,ano_gini_ipea,ano_pref_masc,ano_pref_idade_tse,ano_pref_escola,ano_p_cad_pref,ano_vereador_eleit,ano_ENLP2000,ano_comarca)
attach(table9)
prefchar2<-cbind(pref_masc,pref_idade_tse,pref_escola,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar2<-cbind(lpop,purb,p_secundario,mun_novo,lpib02,gini_ipea)

col1<-lm(dconvenios~tr_ano2004+tr_ano2003+tr_ano2002+first+ano2002+ano2003+ano2004+prefchar2+munichar2+p_cad_pref+vereador_eleit+ENLP2000+comarca)
 #clustering by cod_ibge6?
 #test tr_ano2003= tr_ano2002
 #test tr_ano2004= tr_ano2003
 #test tr_ano2004= tr_ano2002
M <- length(unique(cod_ibge6))   
N <- length(cod_ibge6)           
K <- col1$rank                        
dfc <- (M/(M-1))*((N-1)/(N-K))  
dfcw <- 1        
uj  <- apply(estfun(col1),2, function(x) tapply(x, cod_ibge6, sum));
vcovCL <- dfc*sandwich(col1, meat=crossprod(uj)/N)*dfcw
coeftest(col1, vcovCL) 

col2<-lm(dconvenios~first+tr_ano2002+tr_ano2003+tr_ano2004+ano2002+ano2003+ano2004+ano_lpib02+ano_purb+ano_p_secundario+ano_mun_novo+ano_gini_ipea+ano_pref_masc+ano_pref_idade_tse+ano_pref_escola+ano_p_cad_pref+ano_vereador_eleit+ano_ENLP2000+ano_comarca)
 #test tr_ano2003= tr_ano2002
 #test tr_ano2004= tr_ano2003
 #test tr_ano2004= tr_ano2002
M <- length(unique(cod_ibge6))   
N <- length(cod_ibge6)           
K <- col2$rank                        
dfc <- (M/(M-1))*((N-1)/(N-K))  
dfcw <- 1        
uj  <- apply(estfun(col2),2, function(x) tapply(x, cod_ibge6, sum));
vcovCL <- dfc*sandwich(col2, meat=crossprod(uj)/N)*dfcw
coeftest(col2, vcovCL) 

col3<-lm(lconvenios_pc~tr_ano2004+tr_ano2003+tr_ano2002+first+ano2002+ano2003+ano2004+prefchar2+munichar2+p_cad_pref+vereador_eleit+ENLP2000+comarca)
 #test tr_ano2003= tr_ano2002
 #test tr_ano2004= tr_ano2003
 #test tr_ano2004= tr_ano2002
M <- length(unique(cod_ibge6))   
N <- length(cod_ibge6)           
K <- col3$rank                        
dfc <- (M/(M-1))*((N-1)/(N-K))  
dfcw <- 1        
uj  <- apply(estfun(col3),2, function(x) tapply(x, cod_ibge6, sum));
vcovCL <- dfc*sandwich(col3, meat=crossprod(uj)/N)*dfcw
coeftest(col3, vcovCL) 

col4<-lm(lconvenios_pc~first+tr_ano2002+tr_ano2003+tr_ano2004+ano2002+ano2003+ano2004+ano_lpib02+ano_purb+ano_p_secundario+ano_mun_novo+ano_gini_ipea+ano_pref_masc+ano_pref_idade_tse+ano_pref_escola+ano_p_cad_pref+ano_vereador_eleit+ano_ENLP2000+ano_comarca+factor(cod_ibge6))
summary.lm(col4)
 #test tr_ano2003= tr_ano2002
 #test tr_ano2004= tr_ano2003
 #test tr_ano2004= tr_ano2002
M <- length(unique(cod_ibge6))   
N <- length(cod_ibge6)           
K <- col4$rank                        
dfc <- (M/(M-1))*((N-1)/(N-K))  
dfcw <- 1        
uj  <- apply(estfun(col4),2, function(x) tapply(x, cod_ibge6, sum))
vcovCL <- dfc*sandwich(col4, meat=crossprod(uj)/N)*dfcw
coeftest(col4, vcovCL) 
 
col5<-lm(msh_liberado~first+tr_ano2002+tr_ano2003+tr_ano2004+ano2002+ano2003+ano2004+prefchar2+munichar2+p_cad_pref+vereador_eleit+ENLP2000+comarca)
 #test tr_ano2003= tr_ano2002
 #test tr_ano2004= tr_ano2003
 #test tr_ano2004= tr_ano2002
M <- length(unique(cod_ibge6))   
N <- length(cod_ibge6)           
K <- col5$rank                        
dfc <- (M/(M-1))*((N-1)/(N-K))  
dfcw <- 1        
uj  <- apply(estfun(col5),2, function(x) tapply(x, cod_ibge6, sum))
vcovCL <- dfc*sandwich(col5, meat=crossprod(uj)/N)*dfcw
coeftest(col5, vcovCL) 
 
col6<-lm(msh_liberado~first+tr_ano2002+tr_ano2003+tr_ano2004+ano2002+ano2003+ano2004+ano_lpib02+ano_purb+ano_p_secundario+ano_mun_novo+ano_gini_ipea+ano_pref_masc+ano_pref_idade_tse+ano_pref_escola+ano_p_cad_pref+ano_vereador_eleit+ano_ENLP2000+ano_comarca+factor(cod_ibge6))
 #test tr_ano2003= tr_ano2002
 #test tr_ano2004= tr_ano2003
 #test tr_ano2004= tr_ano2002
M <- length(unique(cod_ibge6))   
N <- length(cod_ibge6)           
K <- col6$rank                        
dfc <- (M/(M-1))*((N-1)/(N-K))  
dfcw <- 1        
uj  <- apply(estfun(col6),2, function(x) tapply(x, cod_ibge6, sum))
vcovCL <- dfc*sandwich(col6, meat=crossprod(uj)/N)*dfcw
coeftest(col6, vcovCL) 
 
#--------------------
#     Table 10
#--------------------

attach(corruptiondata)
h_ENEP2000<- 1/ENEP2000
first_comarca<- comarca * first
first_p_cad_pref<- p_cad_pref * first
first_media2 <- media2 * first
first_h_ENEP2000 <- h_ENEP2000 * first

table10<-cbind(corruptiondata,h_ENEP2000,first_comarca,first_p_cad_pref,first_media2,first_h_ENEP2000)
table10<-table10[table10$esample2==1,]

attach(table10)
prefchar2<-cbind(pref_masc,pref_idade_tse,pref_escola,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar2<-cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio<-cbind(sorteio1,sorteio2,sorteio3,sorteio4,sorteio5,sorteio6,sorteio7,sorteio8,sorteio9,sorteio10)

col1<-lm(pcorrupt~first+first_comarca+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio)
summary.lm(col1)
var.cov <- hccm(col1, "hc1")
ses <- sqrt(diag(var.cov))
ses

col2<-lm(pcorrupt~first+first_media2+media2+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio)
summary.lm(col2)
var.cov <- hccm(col2, "hc1")
ses <- sqrt(diag(var.cov))
ses

col3<-lm(pcorrupt~first+first_p_cad_pref+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio)
summary.lm(col3)
var.cov <- hccm(col3, "hc1")
ses <- sqrt(diag(var.cov))
ses

col4<-lm(pcorrupt~first+first_h_ENEP2000+h_ENEP2000+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio)
summary.lm(col4)
var.cov <- hccm(col4, "hc1")
ses <- sqrt(diag(var.cov))
ses

#--------------------
#     Table 11
#--------------------

attach(corruptiondata)
sorteio_electyear<-rep(NA,476)
for(i in 1:476){
  if(nsorteio[i]>5 & nsorteio[i]<10){sorteio_electyear[i]=1}
  else{sorteio_electyear[i]=0}
}
first_sorteio_electyear<-first*sorteio_electyear
first_PT<-first*party_d15
first_samepartygov98<-first*samepartygov98 #manipulation same party governor
lrecursos_fisc<-log(valor_fiscalizado) #manipulation of fiscalizacoes

table11<-cbind(corruptiondata,first_sorteio_electyear,first_PT,first_samepartygov98,lrecursos_fisc)
table11<-table11[table11$esample2==1,]
attach(table11)
prefchar2<-cbind(pref_masc,pref_idade_tse,pref_escola,party_d1,party_d3,party_d4,party_d5,party_d6,party_d7,party_d8,party_d9,party_d10,party_d11,party_d12,party_d13,party_d14,party_d15,party_d16,party_d17,party_d18)
munichar2<-cbind(lpop, purb, p_secundario, mun_novo, lpib02, gini_ipea)
sorteio<-cbind(sorteio1,sorteio2,sorteio3,sorteio4,sorteio5,sorteio6,sorteio7,sorteio8,sorteio9,sorteio10)

col1<-lm(pcorrupt~first+first_sorteio_electyear+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col1)
var.cov <- hccm(col1, "hc1")
ses <- sqrt(diag(var.cov))
ses

col2<-lm(pcorrupt~first+first_PT+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col2)
var.cov <- hccm(col2, "hc1")
ses <- sqrt(diag(var.cov))
ses

col3<-lm(pcorrupt~first+first_samepartygov98+samepartygov98+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col3)
var.cov <- hccm(col3, "hc1")
ses <- sqrt(diag(var.cov))
ses

col4<-lm(lrecursos_fisc~first+prefchar2+munichar2+lrec_trans+p_cad_pref+vereador_eleit+ENLP2000+comarca+lfunc_ativ+sorteio+factor(uf))
summary.lm(col4)
var.cov <- hccm(col4, "hc1")
ses <- sqrt(diag(var.cov))
ses

col5<-lm(lrecursos_fisc~first+first_sorteio_electyear+sorteio_electyear+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col5)
var.cov <- hccm(col5, "hc1")
ses <- sqrt(diag(var.cov))
ses

col6<-lm(lrecursos_fisc~first+first_PT+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col6)
var.cov <- hccm(col6, "hc1")
ses <- sqrt(diag(var.cov))
ses

col7<-lm(lrecursos_fisc~first+first_samepartygov98+samepartygov98+prefchar2+munichar2+lrec_trans+lfunc_ativ+p_cad_pref+vereador_eleit+ENLP2000+comarca+sorteio+factor(uf))
summary.lm(col7)
var.cov <- hccm(col7, "hc1")
ses <- sqrt(diag(var.cov))
ses




